COMPARING RAPIDEYE AND LANDSAT 8 OLI ACCURACIES

MAP TREE ATTRIBUTES USING RapidEye IMAGE AND kNN METHOD

##########################
#SET WORKING DIRECTORY
##########################

# clean the working environment and graphics
rm(list=ls()); graphics.off()

# specify path to your working directory
setwd("C:/Users/oyelo/Desktop/advanced remote sensing/exercise 3/BIODEV training manual, data and scripts-20170403/R/R/Data")
getwd()
## [1] "C:/Users/oyelo/Desktop/advanced remote sensing/exercise 3/BIODEV training manual, data and scripts-20170403/R/R/Data"
#Set path for packages installation and load the packages
.libPaths("C:/Users/oyelo/Desktop/advanced remote sensing/exercise 3")

library(sp)
library(rgdal)
library(raster)
library(yaImpute)
library(fastICA)



##########################
#RUN FUNCTIONS
##########################

#####
# Function for accuracy assessment
#####

print.accuracy <- function(obs, fit, main) {
  print(main)
  res <- fit-obs
  bias <- signif(mean(res), 3); cat("BIAS:", bias)
  bias.r <- signif(bias/mean(obs)*100, 3); cat( " (",bias.r,"%)\n", sep="")
  RMSE <- signif(sqrt(sum((res^2)/length(res))), 3); cat("RMSE:",RMSE)
  RMSE.r <- signif(RMSE/mean(obs)*100, 3); cat( " (",RMSE.r,"%)\n", sep="")
  R2 <- signif(cor(obs,fit,method="pearson")^2, 3); #pseudo R^2
  cat("Pseudo R.squared: ", R2,"\n\n", sep="")
  ret <- c(bias, bias.r, RMSE, RMSE.r, R2)
}


#####
# Function for leave-one-out cross-validation
#####

cv.plot <- function(data, yvar, xvars, k, yai.method) {
  pred <- numeric(nrow(data)) #empty vector for results
  for(i in 1:nrow(data)){
    ref <- which(row.names(data)!=i)
    val <- which(row.names(data)==i)
    x <- as.data.frame(data[,xvars])
    y <- as.data.frame(data[ref, yvar])
    names(y) <- yvar
    row.names(y) <- row.names(data)[-val]
    knn <- yai(x=x, y=y, k=k, data=data, method=yai.method)
    imp <- impute(knn, k=k, method="dstWeighted", ancillaryData=y)
    pred[val] <- imp[as.numeric(row.names(imp)) %in% val, yvar]
  }
  
  #print results
  res <- print.accuracy(data[,yvar], pred, yvar)
  xlim <- max(data[,yvar])
  plot(data[,yvar], pred, main=yvar, xlab="Observed", ylab="Predicted",
       xlim=c(0,xlim), ylim=c(0,xlim))
  abline(0,1)
  return(res)
}

##########################
#LOAD INPUT DATA 
##########################

# load required field data
load("Kou_input_plots.rda")   #"Plots" are required because of coordinates
load("PlotData_Kou.rda")      #Plot-level results

RAPIDEYE IMAGE

’

# "re_refl.tiff" is a spatial subset of Rapid Eye satellite image
# acquired 11 October 2013

# load multiband satellite image stack
rapideye<- stack("C:/Users/oyelo/Desktop/advanced remote sensing/exercise 3/BIODEV training manual, data and scripts-20170403/R/R/Data/re_refl.tif")
rapideye#see properties of RasterStack object
## class       : RasterStack 
## dimensions  : 3510, 3720, 13057200, 5  (nrow, ncol, ncell, nlayers)
## resolution  : 5, 5  (x, y)
## extent      : 605385, 623985, 1289655, 1307205  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=30 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## names       : re_refl.1, re_refl.2, re_refl.3, re_refl.4, re_refl.5 
## min values  :        11,       362,       411,       827,       922 
## max values  :      6928,      7321,      7326,      7059,      7456
# aggregare ERapid EYe
rapideye.30m<-aggregate(rapideye, fact=6, fun=mean)
rapideye.30m
## class       : RasterBrick 
## dimensions  : 585, 620, 362700, 5  (nrow, ncol, ncell, nlayers)
## resolution  : 30, 30  (x, y)
## extent      : 605385, 623985, 1289655, 1307205  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=30 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## data source : C:\Users\oyelo\AppData\Local\Temp\Rtmpmg7tL3\raster\r_tmp_2017-04-11_065243_11012_17175.grd 
## names       : re_refl.1, re_refl.2, re_refl.3, re_refl.4, re_refl.5 
## min values  :  182.2500,  522.5000,  552.5278, 1039.5556, 1092.5278 
## max values  :  2946.083,  3598.556,  4071.917,  4346.778,  5084.139
#remove unnecessary files
rm(rapideye)

# the image has five spectral bands corresponding to blue, green, red, red edge
# near infrared (NIR)  parts of the electromagnetic
# spectrum

# rename bands
bands_re <- c("b.re","g.re","r.re","rededge","nir.re")
names(rapideye.30m) <- bands_re

# visualize the image
plotRGB(rapideye.30m, r="r.re", g="g.re", b="b.re", stretch="lin")

# the image covers 12 km x 12 km area around sentinel site mid-point
# this is so called "true-color" composite as it best corresponds to colors seen by eye

# several "false-color" composites exist for visualization
# here, green vegetation is seen in red (strong reflectance in near infrared)
plotRGB(rapideye.30m, r="nir.re", g="r.re", b="g.re", stretch="lin")

LANDSAT IMAGE

# "Landsat.tiff" is a spatial subset of Landsat 8 OLI satellite image
# acquired 11 October 2013

# load multiband satellite image stack
landsat<- stack("C:/Users/oyelo/Desktop/advanced remote sensing/exercise 3/BIODEV training manual, data and scripts-20170403/R/R/Data/ls8_sr_2014_02_16.tif")
landsat#see properties of RasterStack object
## class       : RasterStack 
## dimensions  : 585, 620, 362700, 6  (nrow, ncol, ncell, nlayers)
## resolution  : 30, 30  (x, y)
## extent      : 605385, 623985, 1289655, 1307205  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=30 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## names       : ls8_sr_2014_02_16.1, ls8_sr_2014_02_16.2, ls8_sr_2014_02_16.3, ls8_sr_2014_02_16.4, ls8_sr_2014_02_16.5, ls8_sr_2014_02_16.6 
## min values  :                 484,                 726,                 876,                1614,                2005,                1420 
## max values  :                1578,                2298,                3041,                3977,                7361,               13529
# the image has six spectral bands corresponding to blue, green, red, 
# near infrared (NIR) and short waveinfrared (SWIR) parts of the electromagnetic
# spectrum

# rename bands
bands_l8 <- c("b","g","r","nir","swir1","swir2")
names(landsat) <- bands_l8

# visualize the image
plotRGB(landsat, r="r", g="g", b="b", stretch="lin")

# the image covers 12 km x 12 km area around sentinel site mid-point
# this is so called "true-color" composite as it best corresponds to colors seen by eye

# several "false-color" composites exist for visualization
# here, green vegetation is seen in red (strong reflectance in near infrared)
plotRGB(landsat, r="nir", g="r", b="g", stretch="lin")

JOIN FIELD DATA WITH COORDINATES FOR RAPIDEYE

# merge Plot.data with plots (coordinates only)
data <- merge(Plot.data, plots[c("plot.id","lat","lon")], by="plot.id", all.x=TRUE)

# reproject coordinates from Lat/Lon to UTM
# add columns for "utm_x" and "utm_y"
data[c("utm_x","utm_y")] <- as.data.frame(project(cbind(data$lon, data$lat),
                                                  "+proj=utm +zone=30 ellps=WGS84"))

# plot image with plots
plotRGB(rapideye.30m, r=4, g=3, b=2, stretch="lin")
points(data$utm_x, data$utm_y, pch=19, col="yellow")

JOIN FIELD DATA WITH COORDINATES FOR LANDSAT8

# merge Plot.data with plots (coordinates only)
data <- merge(Plot.data, plots[c("plot.id","lat","lon")], by="plot.id", all.x=TRUE)

# reproject coordinates from Lat/Lon to UTM
# add columns for "utm_x" and "utm_y"
data[c("utm_x","utm_y")] <- as.data.frame(project(cbind(data$lon, data$lat),
                                                  "+proj=utm +zone=30 ellps=WGS84"))

# plot image with plots
plotRGB(landsat, r=4, g=3, b=2, stretch="lin")
points(data$utm_x, data$utm_y, pch=19, col="yellow")

EXTRACT PIXEL-VALUES FROM RAPIDEYE IMAGE

# for further analysis, we need to extract pixel values for the plots
data[bands_re] <- as.data.frame(extract(rapideye.30m, cbind(data$utm_x, data$utm_y),
                                        method='bilinear'))

# see how data looks now
head(data)
##   plot.id cluster plot count.ha    ba.ha    agb.ha   bgb.ha   agc.ha
## 1     101       1    1      120 2.774560  8.565222 2.355436 4.025654
## 2     102       1    2       20 1.517970  7.105600 1.954040 3.339632
## 3     103       1    3      240 2.021866  5.727592 1.575088 2.691968
## 4     104       1    4       20 2.202256 11.602759 3.190759 5.453297
## 5     105       1    5       30 2.387948 10.386064 2.856168 4.881450
## 6     106       1    6       40 3.362431 15.486342 4.258744 7.278581
##      bgc.ha      v.ha  dbh.mean dbh.qdr.mean    h.mean  h.loreys
## 1 1.1070550  7.449620 16.683333     18.56205  6.231667  6.392793
## 2 0.9183987  6.245248 30.900000     31.63927  9.350000  9.795736
## 3 0.7402913  4.979081  8.583333     16.99223  3.882083  5.863372
## 4 1.4996566 10.312879 37.400000     37.57286 10.900000 11.149689
## 5 1.3423988  9.120726 31.500000     32.84413  8.800000  9.094006
## 6 2.0016097 13.900897 32.200000     34.35429  9.162500  9.843287
##   count.ha.sdt ba.ha.sdt agb.ha.sdt bgb.ha.sdt agc.ha.sdt bgc.ha.sdt
## 1            0         0          0          0          0          0
## 2            0         0          0          0          0          0
## 3            0         0          0          0          0          0
## 4            0         0          0          0          0          0
## 5            0         0          0          0          0          0
## 6            0         0          0          0          0          0
##   v.ha.sdt count.ha.ddw agb.ha.ddw v.ha.ddw agc.ha.ddw    c.can      lat
## 1        0            0  0.0000000 0.000000 0.00000000 18.50408 11.69887
## 2        0            0  0.0000000 0.000000 0.00000000 12.29554 11.70358
## 3        0          100  0.1403026 2.775008 0.06594222 20.16882 11.69863
## 4        0            0  0.0000000 0.000000 0.00000000 10.68749 11.70473
## 5        0            0  0.0000000 0.000000 0.00000000 13.00654 11.70902
## 6        0            0  0.0000000 0.000000 0.00000000 22.17338 11.70736
##         lon    utm_x   utm_y      b.re     g.re     r.re  rededge   nir.re
## 1 -1.982213 610922.2 1293456  819.4953 1366.672 2012.315 2481.336 3365.917
## 2 -1.982173 610924.7 1293977 1229.4334 1855.866 2637.134 3025.833 3762.842
## 3 -1.978797 611294.6 1293430  851.0370 1334.529 1921.262 2374.430 3161.094
## 4 -1.975357 611667.1 1294106 1104.8116 1758.811 2561.255 2980.392 3798.727
## 5 -1.977112 611474.1 1294580  839.8548 1343.992 1987.241 2478.641 3291.341
## 6 -1.976407 611551.6 1294397  870.4163 1422.145 2121.354 2550.309 3316.303
# remove unnecessary data
rm(Plot.data, plots)

EXTRACT PIXEL-VALUES FROM LANDSAT8 IMAGE

# for further analysis, we need to extract pixel values for the plots
data[bands_l8] <- as.data.frame(extract(landsat, cbind(data$utm_x, data$utm_y),
                                        method='bilinear'))

# see how data looks now
head(data)
##   plot.id cluster plot count.ha    ba.ha    agb.ha   bgb.ha   agc.ha
## 1     101       1    1      120 2.774560  8.565222 2.355436 4.025654
## 2     102       1    2       20 1.517970  7.105600 1.954040 3.339632
## 3     103       1    3      240 2.021866  5.727592 1.575088 2.691968
## 4     104       1    4       20 2.202256 11.602759 3.190759 5.453297
## 5     105       1    5       30 2.387948 10.386064 2.856168 4.881450
## 6     106       1    6       40 3.362431 15.486342 4.258744 7.278581
##      bgc.ha      v.ha  dbh.mean dbh.qdr.mean    h.mean  h.loreys
## 1 1.1070550  7.449620 16.683333     18.56205  6.231667  6.392793
## 2 0.9183987  6.245248 30.900000     31.63927  9.350000  9.795736
## 3 0.7402913  4.979081  8.583333     16.99223  3.882083  5.863372
## 4 1.4996566 10.312879 37.400000     37.57286 10.900000 11.149689
## 5 1.3423988  9.120726 31.500000     32.84413  8.800000  9.094006
## 6 2.0016097 13.900897 32.200000     34.35429  9.162500  9.843287
##   count.ha.sdt ba.ha.sdt agb.ha.sdt bgb.ha.sdt agc.ha.sdt bgc.ha.sdt
## 1            0         0          0          0          0          0
## 2            0         0          0          0          0          0
## 3            0         0          0          0          0          0
## 4            0         0          0          0          0          0
## 5            0         0          0          0          0          0
## 6            0         0          0          0          0          0
##   v.ha.sdt count.ha.ddw agb.ha.ddw v.ha.ddw agc.ha.ddw    c.can      lat
## 1        0            0  0.0000000 0.000000 0.00000000 18.50408 11.69887
## 2        0            0  0.0000000 0.000000 0.00000000 12.29554 11.70358
## 3        0          100  0.1403026 2.775008 0.06594222 20.16882 11.69863
## 4        0            0  0.0000000 0.000000 0.00000000 10.68749 11.70473
## 5        0            0  0.0000000 0.000000 0.00000000 13.00654 11.70902
## 6        0            0  0.0000000 0.000000 0.00000000 22.17338 11.70736
##         lon    utm_x   utm_y      b.re     g.re     r.re  rededge   nir.re
## 1 -1.982213 610922.2 1293456  819.4953 1366.672 2012.315 2481.336 3365.917
## 2 -1.982173 610924.7 1293977 1229.4334 1855.866 2637.134 3025.833 3762.842
## 3 -1.978797 611294.6 1293430  851.0370 1334.529 1921.262 2374.430 3161.094
## 4 -1.975357 611667.1 1294106 1104.8116 1758.811 2561.255 2980.392 3798.727
## 5 -1.977112 611474.1 1294580  839.8548 1343.992 1987.241 2478.641 3291.341
## 6 -1.976407 611551.6 1294397  870.4163 1422.145 2121.354 2550.309 3316.303
##           b        g        r      nir    swir1    swir2
## 1  951.6492 1394.592 1817.576 3142.439 3840.167 2950.875
## 2 1204.6968 1756.546 2293.718 3413.576 4432.911 3835.910
## 3  973.9948 1424.100 1842.532 3055.005 3866.137 3044.507
## 4 1154.8354 1697.474 2218.241 3519.053 4218.354 3540.430
## 5  990.9266 1443.459 1904.928 3198.101 3982.312 3107.111
## 6 1083.4366 1569.858 2078.348 3241.671 4099.756 3256.872
# remove unnecessary data
rm(Plot.data, plots)

ACCURACY ASSESSMENT BY CROSS-VALIDATION FOR RAPIDEYE

# test accuracy for single variable
# set "k", method and x-variables
# test several values of k
k <- 3
method <- "msn"
yvar <- "agb.ha"
xvars <- bands_re
graphics.off()
cv.plot(data, yvar, xvars, k, method)
## [1] "agb.ha"
## BIAS: 0.103 (0.55%)
## RMSE: 11.7 (62.4%)
## Pseudo R.squared: 0.375
## [1]  0.103  0.550 11.700 62.400  0.375
# accuracy statistics are printed on console
# plots shows observed vs. predicted values

# test accuracy for several variables
# make vector of y-variables
yvars <- c("ba.ha","agb.ha","h.mean","c.can")
k <- 3
method <- "msn"
xvars <- bands_re
par(mfrow=c(2,2)) #four variables are plotted in four panels
res <- c()
for(i in 1:length(yvars)){
  res <- rbind(res, cv.plot(data, yvars[i], xvars, k, method)) 
}
## [1] "ba.ha"
## BIAS: -0.163 (-3.05%)
## RMSE: 2.79 (52.2%)
## Pseudo R.squared: 0.468
## [1] "agb.ha"
## BIAS: 0.103 (0.55%)
## RMSE: 11.7 (62.4%)
## Pseudo R.squared: 0.375
## [1] "h.mean"
## BIAS: 0.0787 (1.29%)
## RMSE: 2.4 (39.4%)
## Pseudo R.squared: 0.00517
## [1] "c.can"
## BIAS: 0.298 (1.1%)
## RMSE: 13.8 (51.2%)
## Pseudo R.squared: 0.458
# organize results
res <- cbind(yvars, res)
res <- as.data.frame(res)
names(res) <- c("Attribute","Bias","Bias (%)","RMSE","RMSE (%)","R.squared")
res
##   Attribute   Bias Bias (%) RMSE RMSE (%) R.squared
## 1     ba.ha -0.163    -3.05 2.79     52.2     0.468
## 2    agb.ha  0.103     0.55 11.7     62.4     0.375
## 3    h.mean 0.0787     1.29  2.4     39.4   0.00517
## 4     c.can  0.298      1.1 13.8     51.2     0.458

ACCURACY ASSESSMENT BY CROSS-VALIDATION FOR LANDSAT8

# test accuracy for single variable
# set "k", method and x-variables
# test several values of k
k <- 3
method <- "msn"
yvar <- "agb.ha"
xvars <- bands_l8
graphics.off()
cv.plot(data, yvar, xvars, k, method)
## [1] "agb.ha"
## BIAS: 0.0404 (0.216%)
## RMSE: 11.1 (59.2%)
## Pseudo R.squared: 0.424
## [1]  0.0404  0.2160 11.1000 59.2000  0.4240
# accuracy statistics are printed on console
# plots shows observed vs. predicted values

# test accuracy for several variables
# make vector of y-variables
yvars <- c("ba.ha","agb.ha","h.mean","c.can")
k <- 3
method <- "msn"
xvars <- bands_l8
par(mfrow=c(2,2)) #four variables are plotted in four panels
res <- c()
for(i in 1:length(yvars)){
  res <- rbind(res, cv.plot(data, yvars[i], xvars, k, method)) 
}
## [1] "ba.ha"
## BIAS: 0.0793 (1.48%)
## RMSE: 2.52 (47.1%)
## Pseudo R.squared: 0.564
## [1] "agb.ha"
## BIAS: 0.0404 (0.216%)
## RMSE: 11.1 (59.2%)
## Pseudo R.squared: 0.424
## [1] "h.mean"
## BIAS: -0.0183 (-0.3%)
## RMSE: 2.17 (35.6%)
## Pseudo R.squared: 0.0732
## [1] "c.can"
## BIAS: 0.286 (1.06%)
## RMSE: 13.4 (49.7%)
## Pseudo R.squared: 0.484
# organize results
res <- cbind(yvars, res)
res <- as.data.frame(res)
names(res) <- c("Attribute","Bias","Bias (%)","RMSE","RMSE (%)","R.squared")
res
##   Attribute    Bias Bias (%) RMSE RMSE (%) R.squared
## 1     ba.ha  0.0793     1.48 2.52     47.1     0.564
## 2    agb.ha  0.0404    0.216 11.1     59.2     0.424
## 3    h.mean -0.0183     -0.3 2.17     35.6    0.0732
## 4     c.can   0.286     1.06 13.4     49.7     0.484

PREDICT ATTRIBUTES TO PIXELS FOR RAPIDEYE

# create yai-object
# select vegetation attributes
yvars <- c("agb.ha","c.can")
xvars <- bands_re
y <- as.data.frame(data[,yvars])
x <- as.data.frame(data[,xvars])
k = 3
method = "msn"
msn <- yai(x=x, y=y, k=k, data=data, method=method)

# convert RapidEye image to data frame
rapideye.30m.df <- as.data.frame(rapideye.30m)

# add 160 observations to data frame
r160 <- as.data.frame(matrix(nrow=160, ncol=5, 0))
names(r160) <- bands_re
rapideye.30m.df <- rbind(r160, rapideye.30m.df)

# predict values for pixels
pred <- predict(msn, newdata=rapideye.30m.df, k=3, method="dstWeighted", observed=FALSE)
# ignore warning: this is why 160 rows were added above 


#RapidEye
# convert data frame back to raster attribute by attribute
# nrow and ncol depend on the image size
# set extent and projection based on the image
# aboveground biomass
agb_ha <- raster(matrix(pred$agb.ha, nrow=585, ncol=620, byrow=TRUE))
extent(agb_ha) <- rapideye.30m
projection(agb_ha) <- projection(rapideye.30m)

# canopy cover
cc <- raster(matrix(pred$c.can, nrow=585, ncol=620, byrow=TRUE))
extent(cc) <- rapideye.30m
projection(cc) <- projection(rapideye.30m)

# plot rasters
par(mfrow=c(1,2))
plot(agb_ha, main="Aboveground biomass (Mg/ha)")
plot(cc, main="Canopy cover (%)")

# compute mean based on the raster
round(cellStats(agb_ha, stat='mean'), 2)
## [1] 15.66
round(cellStats(cc, stat='mean'), 2)
## [1] 23.76
# should be close to the field based estimate

PREDICT ATTRIBUTES TO PIXELS FOR LANDSAT8

# create yai-object
# select vegetation attributes
yvars <- c("agb.ha","c.can")
xvars <- bands_l8
y <- as.data.frame(data[,yvars])
x <- as.data.frame(data[,xvars])
k = 3
method = "msn"
msn <- yai(x=x, y=y, k=k, data=data, method=method)

# convert Landsat image to data frame
landsat.df <- as.data.frame(landsat)

# add 160 observations to data frame
r160 <- as.data.frame(matrix(nrow=160, ncol=6, 0))
names(r160) <- bands_l8
landsat.df <- rbind(r160, landsat.df)

# predict values for pixels
pred <- predict(msn, newdata=landsat.df, k=3, method="dstWeighted", observed=FALSE)
# ignore warning: this is why 160 rows were added above 



#Lansaat
# convert data frame back to raster attribute by attribute
# nrow and ncol depend on the image size
# set extent and projection based on the image

# aboveground biomass
agb_ha <- raster(matrix(pred$agb.ha, nrow=585, ncol=620, byrow=TRUE))
extent(agb_ha) <- landsat
projection(agb_ha) <- projection(landsat)

# canopy cover
cc <- raster(matrix(pred$c.can, nrow=585, ncol=620, byrow=TRUE))
extent(cc) <- landsat
projection(cc) <- projection(landsat)



# plot rasters
par(mfrow=c(1,2))
plot(agb_ha, main="Aboveground biomass (Mg/ha)")
plot(cc, main="Canopy cover (%)")

# compute mean based on the raster
round(cellStats(agb_ha, stat='mean'), 2)
## [1] 16.13
round(cellStats(cc, stat='mean'), 2)
## [1] 23.42
# should be close to the field based estimate